library(dplyr)
library(ggplot2)
library(purrr)
library(tibble)
library(tidyr)
library(stringr)

Data visualization

In this notebook, the average tensor values extracted across ROIs are plotted before filtering and aggregation of the data. All regions are included.

Abbreviations:

  • White Matter (WM)

  • Grey Matter (GM)

  • Cerebrospinal Fluid (CSF)

  • Ventricles (VN)

  • Fractional Asinotropy (FA)

  • Mean Diffusivity (MD)

  • Axial Diffusivity (AD)

  • Radial Diffusivity (RD)

Colors in the plots represent distinct subjects.

seg_df = read.csv('avg_tensor_by_roi.csv', colClasses = c('roi' = 'character')) %>% 
  mutate(subject = str_extract(sub, "(?<=sub-)\\d{5}"),
         site = str_extract(sub, "(?<=sub-\\d{5})\\D+"),
         visit = str_extract(sub, "[0-9]{2}$"), 
         .after = sub)
seg_df[,1] <- NULL
jlf_dict = readxl::read_xlsx('/home/vgonzenb/MSKIDS/data/MUSE_ROI_Dict.xlsx') %>% 
  filter(ROI_INDEX %in% 1:207)
#' Name ROIs according to Segmentation
#'
#' essentially, name_rois enables distinct recoding schemes of the 'roi' column according to the value in the 'segmentation' column.
#' for usage, split or filter a data.frame by unique segmentations and indicate the segmentation type
#' @param df a data.frame corresponding to data from one segmentation type only
#' @param seg_type the segmentation type: 'atropos', 'fast', 'first', 'jlfseg_WMGM', or 'jlfseg_thal'
#'@examples
#'\dontrun{
#' multiseg_df %>% split(seg_df$segmentation) %>% purrr::imap(name_rois) %>% dplyr::bind_rows()
#'}
#'@examples
#'\dontrun{
#' multiseg_df %>% filter(segmentation == seg_type) %>% name_rois(seg_type)
#'}
#' @return a data.frame with a recoded roi column
name_rois <- function(df, seg_type){
  roi_names_by_seg_type <- list(atropos = c(`1` = "CSF", `2` = "GM", `3` = "WM"),
                                fast = c(`1` = "CSF", `2` = "GM", `3` = "WM"),
                                first = c(`10` = "L. Thalamus",
                                          `11` = "L. Caudate",
                                          `12` = "L. Putamen",
                                          `13` = "L. Pallidum",
                                          `16` = "Brain-Stem/4th Ventricle",
                                          `17` = "L. Hippocampus",
                                          `18` = "L. Amygdala",
                                          `26` = "L. Accumbens-area",
                                          `49` = "R. Thalamus",
                                          `50` = "R. Caudate",
                                          `51` = "R. Putamen",
                                          `52` = "R. Pallidum",
                                          `53` = "R. Hippocampus",
                                          `54` = "R. Amygdala",
                                          `58` = "R. Accumbens-area"),
                                jlfseg_WMGM = jlf_dict$ROI_NAME |> setNames(jlf_dict$ROI_INDEX),
                                jlfseg_thal = c(`1` = 'Thalamus'))
  roi_names <- roi_names_by_seg_type[[seg_type]]
  df %>% 
    mutate(roi = recode(roi, !!!roi_names))
}

seg_df <- seg_df %>% 
  split(seg_df$segmentation) %>% 
  imap(name_rois) %>% 
  bind_rows()
# name tissue types
add_tissue_col <- function(df, seg_type){
    tissues_by_seg_type <- list(atropos = setNames(nm = c("CSF", "GM", "WM")),
                                fast = setNames(nm = c("CSF", "GM", "WM")),
                                first = setNames(c("Thalamus", "Thalamus"), 
                                                 c("L.Thalamus", "R.Thalamus")),
                                jlfseg_WMGM = jlf_dict$TISSUE_SEG |> setNames(jlf_dict$ROI_NAME),
                                jlfseg_thal = c('Thalamus' = 'Thalamus'))
    tissue_type <- tissues_by_seg_type[[seg_type]]
    df %>% 
      mutate(tissue = recode(roi, !!!tissue_type), .after = "roi")
}

seg_df <- seg_df %>% 
  split(seg_df$segmentation) %>% 
  imap(add_tissue_col) %>% 
  bind_rows()
# plot each segmentation
plot_seg <- function(mini_df, title=NULL){
  
  title <- paste(title, names(mini_df))
  
  mini_df %>%  
    ggplot(aes(group=roi, x=roi, color=subject, y=values)) + 
    geom_boxplot(outlier.shape = NA, alpha = 0.4) + 
    geom_jitter() +
    coord_flip() +
    theme(legend.position="none")
}

plot_list <- seg_df %>% 
  filter(segmentation != 'jlfseg_WMGM') %>% 
  split(list(.$segmentation, .$tensormap)) %>% 
  purrr::imap(plot_seg)
for (plot_name in names(plot_list)){
  header <- plot_name %>% 
    str_replace("\\.", " ") %>% 
    str_replace("_", " ") %>% 
    str_to_upper()
  cat("###", header, "\n")
  print(plot_list[[plot_name]])
  cat('\n\n')
}

ATROPOS AD

FAST AD

FIRST AD

JLFSEG THAL AD

ATROPOS FA

FAST FA

FIRST FA

JLFSEG THAL FA

ATROPOS MD

FAST MD

FIRST MD

JLFSEG THAL MD

ATROPOS RD

FAST RD

FIRST RD

JLFSEG THAL RD

plot_list <- seg_df %>% 
      filter(segmentation == 'jlfseg_WMGM', !roi %in% c('46', '63', '64', '69'), tissue != 'GM') %>%
      split(list(.$tissue, .$tensormap)) %>%
      purrr::imap(plot_seg)
for (plot_name in names(plot_list)){
  header <- plot_name %>% 
    str_replace("\\.", " ") %>% 
    str_to_upper()
  cat("###", "JLF", header, "\n")
  print(plot_list[[plot_name]])
  cat('\n\n')
}

JLF 0 AD

JLF NONE AD

JLF VN AD

JLF WM AD

JLF 0 FA

JLF NONE FA

JLF VN FA

JLF WM FA

JLF 0 MD

JLF NONE MD

JLF VN MD

JLF WM MD

JLF 0 RD

JLF NONE RD

JLF VN RD

JLF WM RD

JLF GM

plot_list <- seg_df %>%
  filter(segmentation == 'jlfseg_WMGM', !roi %in% c('46', '63', '64', '69'), tissue == 'GM') %>% 
  split(list(rep(c(1,2,3,4,5,6), each = 20)[-120], .$tensormap)) %>% 
  purrr::imap(plot_seg)
for (plot_name in names(plot_list)){
  header <- plot_name %>% 
    str_replace("[0-9]+\\.", "") %>% 
    str_to_upper()
  cat("###", "JLF GM", header, "\n")
  print(plot_list[[plot_name]])
  cat('\n\n')
}

JLF GM AD

JLF GM AD

JLF GM AD

JLF GM AD

JLF GM AD

JLF GM AD

JLF GM FA

JLF GM FA

JLF GM FA

JLF GM FA

JLF GM FA

JLF GM FA

JLF GM MD

JLF GM MD

JLF GM MD

JLF GM MD

JLF GM MD

JLF GM MD

JLF GM RD

JLF GM RD

JLF GM RD

JLF GM RD

JLF GM RD

JLF GM RD

Missing Info on Labels

Excluding non brain (label 0), irrelevant segmentations for first. JLF shows three labels 46, 63, 64, 69 that are not in the current data dictionary. This might lead to missing information on

seg_df %>% 
  mutate(exclude = str_detect(tissue, "^[0-9]+$")) %>% 
  filter(exclude == TRUE) %>% 
  filter(tensormap == 'FA') %>% # to reduce the amount of data viewer
  group_by(segmentation, tissue) %>% 
  filter(row_number()==1, roi != 0) %>%
  select(-tensormap, -tissue)
## Adding missing grouping variables: `tissue`
seg_df %>% 
  mutate(exclude = str_detect(.$tissue, "^[0-9]+$")) %>% 
  filter(exclude == TRUE) %>% 
  filter(tissue %in% c('NONE', 'CSF', 'VN'))